Loading dataset

Telecom_Data <- data.frame(read.csv("Telecom Data.csv"))
ncol(Telecom_Data)
## [1] 58
nrow(Telecom_Data)
## [1] 51047

There are total 58 Columns and 51,047 Rows

Converting few columns to factor

Telecom_Data$Churn <- factor(Telecom_Data$Churn)
Telecom_Data$CreditRating <- factor(Telecom_Data$CreditRating) 
Telecom_Data$Occupation <- factor(Telecom_Data$Occupation)

Let’s check for null values

library(dplyr)
library(tidyr)
## Checking the null values in the dataset
#summary(Telecom_Data)
#is.null(Telecom_Data)
null_values<-sapply(Telecom_Data, function(x) sum(is.na(x)))
null_values
##                CustomerID                     Churn            MonthlyRevenue 
##                         0                         0                       156 
##            MonthlyMinutes      TotalRecurringCharge     DirectorAssistedCalls 
##                       156                       156                       156 
##            OverageMinutes              RoamingCalls         PercChangeMinutes 
##                       156                       156                       367 
##        PercChangeRevenues              DroppedCalls              BlockedCalls 
##                       367                         0                         0 
##           UnansweredCalls         CustomerCareCalls             ThreewayCalls 
##                         0                         0                         0 
##             ReceivedCalls             OutboundCalls              InboundCalls 
##                         0                         0                         0 
##            PeakCallsInOut         OffPeakCallsInOut       DroppedBlockedCalls 
##                         0                         0                         0 
##       CallForwardingCalls          CallWaitingCalls           MonthsInService 
##                         0                         0                         0 
##                UniqueSubs                ActiveSubs               ServiceArea 
##                         0                         0                         0 
##                  Handsets             HandsetModels      CurrentEquipmentDays 
##                         1                         1                         1 
##                    AgeHH1                    AgeHH2              ChildrenInHH 
##                       909                       909                         0 
##        HandsetRefurbished         HandsetWebCapable                TruckOwner 
##                         0                         0                         0 
##                   RVOwner             Homeownership          BuysViaMailOrder 
##                         0                         0                         0 
##      RespondsToMailOffers            OptOutMailings               NonUSTravel 
##                         0                         0                         0 
##              OwnsComputer             HasCreditCard            RetentionCalls 
##                         0                         0                         0 
##   RetentionOffersAccepted          NewCellphoneUser       NotNewCellphoneUser 
##                         0                         0                         0 
## ReferralsMadeBySubscriber               IncomeGroup            OwnsMotorcycle 
##                         0                         0                         0 
## AdjustmentsToCreditRating              HandsetPrice   MadeCallToRetentionTeam 
##                         0                         0                         0 
##              CreditRating                 PrizmCode                Occupation 
##                         0                         0                         0 
##             MaritalStatus 
##                         0

Few columns have null values but the count is less,

Lets Create New Variables which will help in analysis

##Creation of new variables for our analysis
Telecom_Data$perc_recurrent_charge <- (Telecom_Data$TotalRecurringCharge /Telecom_Data$MonthlyRevenue) * 100

Telecom_Data$perc_overage_minute <- (Telecom_Data$OverageMinutes / Telecom_Data$MonthlyMinutes) * 100

str(Telecom_Data)
## 'data.frame':    51047 obs. of  60 variables:
##  $ CustomerID               : int  3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
##  $ Churn                    : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 1 1 1 1 1 ...
##  $ MonthlyRevenue           : num  24 17 38 82.3 17.1 ...
##  $ MonthlyMinutes           : int  219 10 8 1312 0 682 26 98 24 1056 ...
##  $ TotalRecurringCharge     : int  22 17 38 75 17 52 30 66 35 75 ...
##  $ DirectorAssistedCalls    : num  0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
##  $ OverageMinutes           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ RoamingCalls             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ PercChangeMinutes        : int  -157 -4 -2 157 0 148 60 24 20 43 ...
##  $ PercChangeRevenues       : num  -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
##  $ DroppedCalls             : num  0.7 0.3 0 52 0 9 0 0 0 0 ...
##  $ BlockedCalls             : num  0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
##  $ UnansweredCalls          : num  6.3 2.7 0 76 0 13 2.3 4 1 0 ...
##  $ CustomerCareCalls        : num  0 0 0 4.3 0 0.7 0 4 0 0 ...
##  $ ThreewayCalls            : num  0 0 0 1.3 0 0 0 0 0 0 ...
##  $ ReceivedCalls            : num  97.2 0 0.4 200.3 0 ...
##  $ OutboundCalls            : num  0 0 0.3 370.3 0 ...
##  $ InboundCalls             : num  0 0 0 147 0 0 0 0 1.7 0 ...
##  $ PeakCallsInOut           : num  58 5 1.3 555.7 0 ...
##  $ OffPeakCallsInOut        : num  24 1 3.7 303.7 0 ...
##  $ DroppedBlockedCalls      : num  1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
##  $ CallForwardingCalls      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CallWaitingCalls         : num  0.3 0 0 22.7 0 0.7 0 0 0 0 ...
##  $ MonthsInService          : int  61 58 60 59 53 53 57 59 53 55 ...
##  $ UniqueSubs               : int  2 1 1 2 2 1 2 2 3 1 ...
##  $ ActiveSubs               : int  1 1 1 2 2 1 2 2 3 1 ...
##  $ ServiceArea              : chr  "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
##  $ Handsets                 : int  2 2 1 9 4 3 2 3 4 9 ...
##  $ HandsetModels            : int  2 1 1 4 3 2 2 3 3 5 ...
##  $ CurrentEquipmentDays     : int  361 1504 1812 458 852 231 601 464 544 388 ...
##  $ AgeHH1                   : int  62 40 26 30 46 28 52 46 36 46 ...
##  $ AgeHH2                   : int  0 42 26 0 54 0 58 46 34 68 ...
##  $ ChildrenInHH             : chr  "No" "Yes" "Yes" "No" ...
##  $ HandsetRefurbished       : chr  "No" "No" "No" "No" ...
##  $ HandsetWebCapable        : chr  "Yes" "No" "No" "Yes" ...
##  $ TruckOwner               : chr  "No" "No" "No" "No" ...
##  $ RVOwner                  : chr  "No" "No" "No" "No" ...
##  $ Homeownership            : chr  "Known" "Known" "Unknown" "Known" ...
##  $ BuysViaMailOrder         : chr  "Yes" "Yes" "No" "Yes" ...
##  $ RespondsToMailOffers     : chr  "Yes" "Yes" "No" "Yes" ...
##  $ OptOutMailings           : chr  "No" "No" "No" "No" ...
##  $ NonUSTravel              : chr  "No" "No" "No" "No" ...
##  $ OwnsComputer             : chr  "Yes" "Yes" "No" "No" ...
##  $ HasCreditCard            : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ RetentionCalls           : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ RetentionOffersAccepted  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NewCellphoneUser         : chr  "No" "Yes" "Yes" "Yes" ...
##  $ NotNewCellphoneUser      : chr  "No" "No" "No" "No" ...
##  $ ReferralsMadeBySubscriber: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IncomeGroup              : int  4 5 6 6 9 1 9 6 9 5 ...
##  $ OwnsMotorcycle           : chr  "No" "No" "No" "No" ...
##  $ AdjustmentsToCreditRating: int  0 0 0 0 1 1 1 0 0 1 ...
##  $ HandsetPrice             : chr  "30" "30" "Unknown" "10" ...
##  $ MadeCallToRetentionTeam  : chr  "Yes" "No" "No" "No" ...
##  $ CreditRating             : Factor w/ 7 levels "1-Highest","2-High",..: 1 4 3 4 1 3 1 1 1 3 ...
##  $ PrizmCode                : chr  "Suburban" "Suburban" "Town" "Other" ...
##  $ Occupation               : Factor w/ 8 levels "Clerical","Crafts",..: 5 5 2 4 5 4 7 5 4 5 ...
##  $ MaritalStatus            : chr  "No" "Yes" "Yes" "No" ...
##  $ perc_recurrent_charge    : num  91.7 100.1 100 91.2 99.2 ...
##  $ perc_overage_minute      : num  0 0 0 0 NaN 0 0 0 0 0 ...

Lets calculate churn rate

## Getting Churn counts 
churn_counts<- dplyr::count(Telecom_Data,Churn , sort = TRUE)

Lets plot the churn rate using pie chart

library("ggplot2")
ggplot(data = churn_counts, aes(x = "", y = n, fill = Churn)) + 
  geom_bar(stat = "identity") + 
  coord_polar("y")

Trying out plotly for pie chart for more interactive graphs

library(plotly)
colors <- c('rgb(211,94,96)', 'rgb(128,133,133)', 'rgb(144,103,167)', 'rgb(171,104,87)', 'rgb(114,147,203)')


fig <- plot_ly(type='pie', labels=churn_counts$Churn, values=churn_counts$n, 
               textinfo='label+percent',
               insidetextorientation='radial',marker = list(colors = colors,
                      line = list(color = '#FFFFFF', width = 1)))
fig

Subsetting data for in depth analysis on the basis of churn and checking the summary of the divided data to analyze the trend

library(dplyr)




Telecom_Data_yes = filter(Telecom_Data, Churn == "Yes")

Telecom_Data_no = filter(Telecom_Data, Churn == "No")

summary(Telecom_Data_yes)
##    CustomerID      Churn       MonthlyRevenue MonthlyMinutes
##  Min.   :3000002   No :    0   Min.   :  0    Min.   :   0  
##  1st Qu.:3099298   Yes:14711   1st Qu.: 33    1st Qu.: 132  
##  Median :3195614               Median : 48    Median : 330  
##  Mean   :3194322               Mean   : 58    Mean   : 484  
##  3rd Qu.:3286308               3rd Qu.: 70    3rd Qu.: 667  
##  Max.   :3399978               Max.   :861    Max.   :5410  
##                                NA's   :70     NA's   :70    
##  TotalRecurringCharge DirectorAssistedCalls OverageMinutes  RoamingCalls
##  Min.   :-11          Min.   : 0.0          Min.   :   0   Min.   :  0  
##  1st Qu.: 30          1st Qu.: 0.0          1st Qu.:   0   1st Qu.:  0  
##  Median : 44          Median : 0.2          Median :   4   Median :  0  
##  Mean   : 45          Mean   : 0.8          Mean   :  43   Mean   :  1  
##  3rd Qu.: 55          3rd Qu.: 0.7          3rd Qu.:  46   3rd Qu.:  0  
##  Max.   :338          Max.   :45.8          Max.   :2018   Max.   :851  
##  NA's   :70           NA's   :70            NA's   :70     NA's   :70   
##  PercChangeMinutes PercChangeRevenues  DroppedCalls    BlockedCalls  
##  Min.   :-2868     Min.   :-851       Min.   :  0.0   Min.   :  0.0  
##  1st Qu.: -101     1st Qu.:  -8       1st Qu.:  0.7   1st Qu.:  0.0  
##  Median :  -11     Median :   0       Median :  3.0   Median :  1.0  
##  Mean   :  -25     Mean   :   0       Mean   :  5.8   Mean   :  4.0  
##  3rd Qu.:   54     3rd Qu.:   2       3rd Qu.:  7.3   3rd Qu.:  3.3  
##  Max.   : 5192     Max.   :2484       Max.   :208.7   Max.   :314.7  
##  NA's   :208       NA's   :208                                       
##  UnansweredCalls CustomerCareCalls ThreewayCalls   ReceivedCalls  OutboundCalls
##  Min.   :  0     Min.   :  0.0     Min.   : 0.00   Min.   :   0   Min.   :  0  
##  1st Qu.:  4     1st Qu.:  0.0     1st Qu.: 0.00   1st Qu.:   6   1st Qu.:  2  
##  Median : 15     Median :  0.0     Median : 0.00   Median :  45   Median : 12  
##  Mean   : 26     Mean   :  1.6     Mean   : 0.26   Mean   : 105   Mean   : 24  
##  3rd Qu.: 34     3rd Qu.:  1.3     3rd Qu.: 0.30   3rd Qu.: 140   3rd Qu.: 32  
##  Max.   :849     Max.   :172.3     Max.   :30.00   Max.   :2619   Max.   :520  
##                                                                                
##   InboundCalls   PeakCallsInOut OffPeakCallsInOut DroppedBlockedCalls
##  Min.   :  0.0   Min.   :   0   Min.   :   0      Min.   :  0        
##  1st Qu.:  0.0   1st Qu.:  19   1st Qu.:   9      1st Qu.:  2        
##  Median :  1.7   Median :  58   Median :  31      Median :  5        
##  Mean   :  7.3   Mean   :  84   Mean   :  62      Mean   : 10        
##  3rd Qu.:  8.0   3rd Qu.: 114   3rd Qu.:  80      3rd Qu.: 12        
##  Max.   :298.3   Max.   :1359   Max.   :1314      Max.   :329        
##                                                                      
##  CallForwardingCalls CallWaitingCalls MonthsInService   UniqueSubs   
##  Min.   : 0.0        Min.   :  0.0    Min.   : 6      Min.   :  1.0  
##  1st Qu.: 0.0        1st Qu.:  0.0    1st Qu.:12      1st Qu.:  1.0  
##  Median : 0.0        Median :  0.0    Median :17      Median :  1.0  
##  Mean   : 0.0        Mean   :  1.6    Mean   :19      Mean   :  1.6  
##  3rd Qu.: 0.0        3rd Qu.:  1.3    3rd Qu.:24      3rd Qu.:  2.0  
##  Max.   :33.7        Max.   :135.7    Max.   :61      Max.   :196.0  
##                                                                      
##    ActiveSubs   ServiceArea           Handsets     HandsetModels 
##  Min.   : 0.0   Length:14711       Min.   : 1.00   Min.   : 1.0  
##  1st Qu.: 1.0   Class :character   1st Qu.: 1.00   1st Qu.: 1.0  
##  Median : 1.0   Mode  :character   Median : 1.00   Median : 1.0  
##  Mean   : 1.4                      Mean   : 1.74   Mean   : 1.5  
##  3rd Qu.: 2.0                      3rd Qu.: 2.00   3rd Qu.: 2.0  
##  Max.   :53.0                      Max.   :22.00   Max.   :14.0  
##                                                                  
##  CurrentEquipmentDays     AgeHH1         AgeHH2     ChildrenInHH      
##  Min.   :  -4         Min.   : 0.0   Min.   : 0.0   Length:14711      
##  1st Qu.: 249         1st Qu.: 0.0   1st Qu.: 0.0   Class :character  
##  Median : 366         Median :34.0   Median : 0.0   Mode  :character  
##  Mean   : 422         Mean   :30.3   Mean   :20.4                     
##  3rd Qu.: 564         3rd Qu.:48.0   3rd Qu.:42.0                     
##  Max.   :1779         Max.   :98.0   Max.   :99.0                     
##                       NA's   :249    NA's   :249                      
##  HandsetRefurbished HandsetWebCapable   TruckOwner          RVOwner         
##  Length:14711       Length:14711       Length:14711       Length:14711      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Homeownership      BuysViaMailOrder   RespondsToMailOffers OptOutMailings    
##  Length:14711       Length:14711       Length:14711         Length:14711      
##  Class :character   Class :character   Class :character     Class :character  
##  Mode  :character   Mode  :character   Mode  :character     Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  NonUSTravel        OwnsComputer       HasCreditCard      RetentionCalls
##  Length:14711       Length:14711       Length:14711       Min.   :0.00  
##  Class :character   Class :character   Class :character   1st Qu.:0.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.00  
##                                                           Mean   :0.06  
##                                                           3rd Qu.:0.00  
##                                                           Max.   :4.00  
##                                                                         
##  RetentionOffersAccepted NewCellphoneUser   NotNewCellphoneUser
##  Min.   :0.000           Length:14711       Length:14711       
##  1st Qu.:0.000           Class :character   Class :character   
##  Median :0.000           Mode  :character   Mode  :character   
##  Mean   :0.026                                                 
##  3rd Qu.:0.000                                                 
##  Max.   :3.000                                                 
##                                                                
##  ReferralsMadeBySubscriber  IncomeGroup   OwnsMotorcycle    
##  Min.   :0.00              Min.   :0.00   Length:14711      
##  1st Qu.:0.00              1st Qu.:0.00   Class :character  
##  Median :0.00              Median :5.00   Mode  :character  
##  Mean   :0.05              Mean   :4.26                     
##  3rd Qu.:0.00              3rd Qu.:7.00                     
##  Max.   :9.00              Max.   :9.00                     
##                                                             
##  AdjustmentsToCreditRating HandsetPrice       MadeCallToRetentionTeam
##  Min.   :0.00              Length:14711       Length:14711           
##  1st Qu.:0.00              Class :character   Class :character       
##  Median :0.00              Mode  :character   Mode  :character       
##  Mean   :0.04                                                        
##  3rd Qu.:0.00                                                        
##  Max.   :9.00                                                        
##                                                                      
##     CreditRating   PrizmCode                Occupation    MaritalStatus     
##  1-Highest:2628   Length:14711       Other       :10932   Length:14711      
##  2-High   :5712   Class :character   Professional: 2467   Class :character  
##  3-Good   :2608   Mode  :character   Crafts      :  426   Mode  :character  
##  4-Medium :1399                      Clerical    :  289                     
##  5-Low    :1436                      Self        :  243                     
##  6-VeryLow: 316                      Retired     :  185                     
##  7-Lowest : 612                      (Other)     :  169                     
##  perc_recurrent_charge perc_overage_minute
##  Min.   :-30           Min.   :  0        
##  1st Qu.: 69           1st Qu.:  0        
##  Median : 94           Median :  1        
##  Mean   : 88           Mean   :  7        
##  3rd Qu.:106           3rd Qu.: 10        
##  Max.   :514           Max.   :100        
##  NA's   :72            NA's   :512
summary(Telecom_Data_no)
##    CustomerID      Churn       MonthlyRevenue MonthlyMinutes
##  Min.   :3000014   No :36336   Min.   :  -6   Min.   :   0  
##  1st Qu.:3101025   Yes:    0   1st Qu.:  34   1st Qu.: 170  
##  Median :3204388               Median :  49   Median : 381  
##  Mean   :3205048               Mean   :  59   Mean   : 543  
##  3rd Qu.:3313601               3rd Qu.:  72   3rd Qu.: 743  
##  Max.   :3399994               Max.   :1223   Max.   :7359  
##                                NA's   :86     NA's   :86    
##  TotalRecurringCharge DirectorAssistedCalls OverageMinutes  RoamingCalls 
##  Min.   : -9          Min.   :  0.0         Min.   :   0   Min.   :   0  
##  1st Qu.: 30          1st Qu.:  0.0         1st Qu.:   0   1st Qu.:   0  
##  Median : 45          Median :  0.2         Median :   2   Median :   0  
##  Mean   : 48          Mean   :  0.9         Mean   :  39   Mean   :   1  
##  3rd Qu.: 60          3rd Qu.:  1.0         3rd Qu.:  39   3rd Qu.:   0  
##  Max.   :400          Max.   :159.4         Max.   :4321   Max.   :1112  
##  NA's   :86           NA's   :86            NA's   :86     NA's   :86    
##  PercChangeMinutes PercChangeRevenues  DroppedCalls    BlockedCalls
##  Min.   :-3875     Min.   :-1108      Min.   :  0.0   Min.   :  0  
##  1st Qu.:  -78     1st Qu.:   -7      1st Qu.:  1.0   1st Qu.:  0  
##  Median :   -3     Median :    0      Median :  3.0   Median :  1  
##  Mean   :   -6     Mean   :   -1      Mean   :  6.1   Mean   :  4  
##  3rd Qu.:   70     3rd Qu.:    2      3rd Qu.:  7.7   3rd Qu.:  4  
##  Max.   : 4480     Max.   : 1347      Max.   :221.7   Max.   :384  
##  NA's   :159       NA's   :159                                     
##  UnansweredCalls CustomerCareCalls ThreewayCalls  ReceivedCalls  OutboundCalls
##  Min.   :  0     Min.   :  0       Min.   : 0.0   Min.   :   0   Min.   :  0  
##  1st Qu.:  6     1st Qu.:  0       1st Qu.: 0.0   1st Qu.:  10   1st Qu.:  4  
##  Median : 17     Median :  0       Median : 0.0   Median :  56   Median : 14  
##  Mean   : 29     Mean   :  2       Mean   : 0.3   Mean   : 119   Mean   : 26  
##  3rd Qu.: 37     3rd Qu.:  2       3rd Qu.: 0.3   3rd Qu.: 159   3rd Qu.: 35  
##  Max.   :840     Max.   :327       Max.   :66.0   Max.   :2692   Max.   :644  
##                                                                               
##   InboundCalls PeakCallsInOut OffPeakCallsInOut DroppedBlockedCalls
##  Min.   :  0   Min.   :   0   Min.   :   0      Min.   :  0        
##  1st Qu.:  0   1st Qu.:  25   1st Qu.:  12      1st Qu.:  2        
##  Median :  2   Median :  64   Median :  38      Median :  6        
##  Mean   :  9   Mean   :  93   Mean   :  70      Mean   : 10        
##  3rd Qu.: 10   3rd Qu.: 124   3rd Qu.:  92      3rd Qu.: 13        
##  Max.   :519   Max.   :2091   Max.   :1475      Max.   :412        
##                                                                    
##  CallForwardingCalls CallWaitingCalls MonthsInService   UniqueSubs   
##  Min.   : 0.0        Min.   :  0.0    Min.   : 6.0    Min.   : 1.00  
##  1st Qu.: 0.0        1st Qu.:  0.0    1st Qu.:11.0    1st Qu.: 1.00  
##  Median : 0.0        Median :  0.3    Median :16.0    Median : 1.00  
##  Mean   : 0.0        Mean   :  1.9    Mean   :18.6    Mean   : 1.51  
##  3rd Qu.: 0.0        3rd Qu.:  1.7    3rd Qu.:24.0    3rd Qu.: 2.00  
##  Max.   :81.3        Max.   :212.7    Max.   :60.0    Max.   :12.00  
##                                                                      
##    ActiveSubs    ServiceArea           Handsets     HandsetModels  
##  Min.   : 0.00   Length:36336       Min.   : 1.00   Min.   : 1.00  
##  1st Qu.: 1.00   Class :character   1st Qu.: 1.00   1st Qu.: 1.00  
##  Median : 1.00   Mode  :character   Median : 1.00   Median : 1.00  
##  Mean   : 1.35                      Mean   : 1.83   Mean   : 1.58  
##  3rd Qu.: 2.00                      3rd Qu.: 2.00   3rd Qu.: 2.00  
##  Max.   :11.00                      Max.   :24.00   Max.   :15.00  
##                                     NA's   :1       NA's   :1      
##  CurrentEquipmentDays     AgeHH1        AgeHH2    ChildrenInHH      
##  Min.   :  -5         Min.   : 0    Min.   : 0    Length:36336      
##  1st Qu.: 197         1st Qu.: 0    1st Qu.: 0    Class :character  
##  Median : 310         Median :36    Median : 0    Mode  :character  
##  Mean   : 364         Mean   :32    Mean   :21                      
##  3rd Qu.: 493         3rd Qu.:48    3rd Qu.:44                      
##  Max.   :1812         Max.   :99    Max.   :98                      
##  NA's   :1            NA's   :660   NA's   :660                     
##  HandsetRefurbished HandsetWebCapable   TruckOwner          RVOwner         
##  Length:36336       Length:36336       Length:36336       Length:36336      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Homeownership      BuysViaMailOrder   RespondsToMailOffers OptOutMailings    
##  Length:36336       Length:36336       Length:36336         Length:36336      
##  Class :character   Class :character   Class :character     Class :character  
##  Mode  :character   Mode  :character   Mode  :character     Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  NonUSTravel        OwnsComputer       HasCreditCard      RetentionCalls 
##  Length:36336       Length:36336       Length:36336       Min.   :0.000  
##  Class :character   Class :character   Class :character   1st Qu.:0.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.000  
##                                                           Mean   :0.029  
##                                                           3rd Qu.:0.000  
##                                                           Max.   :3.000  
##                                                                          
##  RetentionOffersAccepted NewCellphoneUser   NotNewCellphoneUser
##  Min.   :0.000           Length:36336       Length:36336       
##  1st Qu.:0.000           Class :character   Class :character   
##  Median :0.000           Mode  :character   Mode  :character   
##  Mean   :0.015                                                 
##  3rd Qu.:0.000                                                 
##  Max.   :3.000                                                 
##                                                                
##  ReferralsMadeBySubscriber  IncomeGroup   OwnsMotorcycle    
##  Min.   : 0.0              Min.   :0.00   Length:36336      
##  1st Qu.: 0.0              1st Qu.:1.00   Class :character  
##  Median : 0.0              Median :5.00   Mode  :character  
##  Mean   : 0.1              Mean   :4.35                     
##  3rd Qu.: 0.0              3rd Qu.:7.00                     
##  Max.   :35.0              Max.   :9.00                     
##                                                             
##  AdjustmentsToCreditRating HandsetPrice       MadeCallToRetentionTeam
##  Min.   : 0.00             Length:36336       Length:36336           
##  1st Qu.: 0.00             Class :character   Class :character       
##  Median : 0.00             Mode  :character   Mode  :character       
##  Mean   : 0.06                                                       
##  3rd Qu.: 0.00                                                       
##  Max.   :25.00                                                       
##                                                                      
##     CreditRating    PrizmCode                Occupation    MaritalStatus     
##  1-Highest: 5894   Length:36336       Other       :26705   Length:36336      
##  2-High   :13281   Class :character   Professional: 6288   Class :character  
##  3-Good   : 5802   Mode  :character   Crafts      : 1093   Mode  :character  
##  4-Medium : 3958                      Clerical    :  697                     
##  5-Low    : 5063                      Self        :  636                     
##  6-VeryLow:  836                      Retired     :  548                     
##  7-Lowest : 1502                      (Other)     :  369                     
##  perc_recurrent_charge perc_overage_minute
##  Min.   :-28.1         Min.   :  0        
##  1st Qu.: 74.0         1st Qu.:  0        
##  Median : 96.1         Median :  1        
##  Mean   :  Inf         Mean   :  6        
##  3rd Qu.:110.4         3rd Qu.:  8        
##  Max.   :  Inf         Max.   :100        
##  NA's   :89            NA's   :367

Data Classification

feat_typ_counts <- data.frame(read.csv("Feat_type_counts.csv"))
#install.packages("plotrix")
library(plotrix)

library("ggplot2")
#pie(feat_typ_counts$Counts, feat_typ_counts$Variable.Type)

piepercent<- round(100 * feat_typ_counts$Counts / sum(feat_typ_counts$Counts), 1)


feat_typ_counts$fraction <- feat_typ_counts$Counts / sum(feat_typ_counts$Counts)

# Compute the cumulative percentages (top of each rectangle)
feat_typ_counts$ymax <- cumsum(feat_typ_counts$fraction)

# Compute the bottom of each rectangle
feat_typ_counts$ymin <- c(0, head(feat_typ_counts$ymax, n=-1))

# Compute label position
feat_typ_counts$labelPosition <- (feat_typ_counts$ymax + feat_typ_counts$ymin) / 2

# Compute a good label
feat_typ_counts$label <- paste0(feat_typ_counts$Variable.Type, "\n Count: ", feat_typ_counts$Counts)


ggplot(feat_typ_counts, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Variable.Type)) +
  geom_rect() +
  geom_label( x=3.5, aes(y=labelPosition, label=label), size=2) +
  scale_fill_brewer(palette=4) +
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none")

Getting summary of the data

xkablesummary(Telecom_Data)
Table: Statistics summary.
CustomerID Churn MonthlyRevenue MonthlyMinutes TotalRecurringCharge DirectorAssistedCalls OverageMinutes RoamingCalls PercChangeMinutes PercChangeRevenues DroppedCalls BlockedCalls UnansweredCalls CustomerCareCalls ThreewayCalls ReceivedCalls OutboundCalls InboundCalls PeakCallsInOut OffPeakCallsInOut DroppedBlockedCalls CallForwardingCalls CallWaitingCalls MonthsInService UniqueSubs ActiveSubs ServiceArea Handsets HandsetModels CurrentEquipmentDays AgeHH1 AgeHH2 ChildrenInHH HandsetRefurbished HandsetWebCapable TruckOwner RVOwner Homeownership BuysViaMailOrder RespondsToMailOffers OptOutMailings NonUSTravel OwnsComputer HasCreditCard RetentionCalls RetentionOffersAccepted NewCellphoneUser NotNewCellphoneUser ReferralsMadeBySubscriber IncomeGroup OwnsMotorcycle AdjustmentsToCreditRating HandsetPrice MadeCallToRetentionTeam CreditRating PrizmCode Occupation MaritalStatus perc_recurrent_charge perc_overage_minute
Min Min. :3000002 No :36336 Min. : -6 Min. : 0 Min. :-11 Min. : 0.0 Min. : 0 Min. : 0 Min. :-3875 Min. :-1108 Min. : 0.0 Min. : 0 Min. : 0 Min. : 0 Min. : 0.0 Min. : 0 Min. : 0 Min. : 0 Min. : 0 Min. : 0 Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 6.0 Min. : 1.0 Min. : 0.0 Length:51047 Min. : 1.00 Min. : 1.00 Min. : -5 Min. : 0 Min. : 0 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Length:51047 Min. :0.00 Min. :0.000 Length:51047 Length:51047 Min. : 0.0 Min. :0.00 Length:51047 Min. : 0.00 Length:51047 Length:51047 1-Highest: 8522 Length:51047 Other :37637 Length:51047 Min. :-29.5 Min. : 0
Q1 1st Qu.:3100632 Yes:14711 1st Qu.: 34 1st Qu.: 158 1st Qu.: 30 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0 1st Qu.: -83 1st Qu.: -7 1st Qu.: 0.7 1st Qu.: 0 1st Qu.: 5 1st Qu.: 0 1st Qu.: 0.0 1st Qu.: 8 1st Qu.: 3 1st Qu.: 0 1st Qu.: 23 1st Qu.: 11 1st Qu.: 2 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.:11.0 1st Qu.: 1.0 1st Qu.: 1.0 Class :character 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 205 1st Qu.: 0 1st Qu.: 0 Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character 1st Qu.:0.00 1st Qu.:0.000 Class :character Class :character 1st Qu.: 0.0 1st Qu.:0.00 Class :character 1st Qu.: 0.00 Class :character Class :character 2-High :18993 Class :character Professional: 8755 Class :character 1st Qu.: 72.6 1st Qu.: 0
Median Median :3201534 NA Median : 48 Median : 366 Median : 45 Median : 0.2 Median : 3 Median : 0 Median : -5 Median : 0 Median : 3.0 Median : 1 Median : 16 Median : 0 Median : 0.0 Median : 53 Median : 14 Median : 2 Median : 62 Median : 36 Median : 5 Median : 0.0 Median : 0.3 Median :16.0 Median : 1.0 Median : 1.0 Mode :character Median : 1.00 Median : 1.00 Median : 329 Median :36 Median : 0 Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Median :0.00 Median :0.000 Mode :character Mode :character Median : 0.0 Median :5.00 Mode :character Median : 0.00 Mode :character Mode :character 3-Good : 8410 Mode :character Crafts : 1519 Mode :character Median : 95.4 Median : 1
Mean Mean :3201957 NA Mean : 59 Mean : 526 Mean : 47 Mean : 0.9 Mean : 40 Mean : 1 Mean : -12 Mean : -1 Mean : 6.0 Mean : 4 Mean : 28 Mean : 2 Mean : 0.3 Mean : 115 Mean : 25 Mean : 8 Mean : 91 Mean : 68 Mean : 10 Mean : 0.0 Mean : 1.8 Mean :18.8 Mean : 1.5 Mean : 1.4 NA Mean : 1.81 Mean : 1.56 Mean : 381 Mean :31 Mean :21 NA NA NA NA NA NA NA NA NA NA NA NA Mean :0.04 Mean :0.018 NA NA Mean : 0.1 Mean :4.32 NA Mean : 0.05 NA NA 4-Medium : 5357 NA Clerical : 986 NA Mean : Inf Mean : 6
Q3 3rd Qu.:3305376 NA 3rd Qu.: 71 3rd Qu.: 723 3rd Qu.: 60 3rd Qu.: 1.0 3rd Qu.: 41 3rd Qu.: 0 3rd Qu.: 66 3rd Qu.: 2 3rd Qu.: 7.7 3rd Qu.: 4 3rd Qu.: 36 3rd Qu.: 2 3rd Qu.: 0.3 3rd Qu.: 154 3rd Qu.: 34 3rd Qu.: 9 3rd Qu.: 121 3rd Qu.: 89 3rd Qu.: 12 3rd Qu.: 0.0 3rd Qu.: 1.3 3rd Qu.:24.0 3rd Qu.: 2.0 3rd Qu.: 2.0 NA 3rd Qu.: 2.00 3rd Qu.: 2.00 3rd Qu.: 515 3rd Qu.:48 3rd Qu.:42 NA NA NA NA NA NA NA NA NA NA NA NA 3rd Qu.:0.00 3rd Qu.:0.000 NA NA 3rd Qu.: 0.0 3rd Qu.:7.00 NA 3rd Qu.: 0.00 NA NA 5-Low : 6499 NA Self : 879 NA 3rd Qu.:109.3 3rd Qu.: 8
Max Max. :3399994 NA Max. :1223 Max. :7359 Max. :400 Max. :159.4 Max. :4321 Max. :1112 Max. : 5192 Max. : 2484 Max. :221.7 Max. :384 Max. :849 Max. :327 Max. :66.0 Max. :2692 Max. :644 Max. :519 Max. :2091 Max. :1475 Max. :412 Max. :81.3 Max. :212.7 Max. :61.0 Max. :196.0 Max. :53.0 NA Max. :24.00 Max. :15.00 Max. :1812 Max. :99 Max. :99 NA NA NA NA NA NA NA NA NA NA NA NA Max. :4.00 Max. :3.000 NA NA Max. :35.0 Max. :9.00 NA Max. :25.00 NA NA 6-VeryLow: 1152 NA Retired : 733 NA Max. : Inf Max. :100
NA NA NA NA’s :156 NA’s :156 NA’s :156 NA’s :156 NA’s :156 NA’s :156 NA’s :367 NA’s :367 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA’s :1 NA’s :1 NA’s :1 NA’s :909 NA’s :909 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 7-Lowest : 2114 NA (Other) : 538 NA NA’s :161 NA’s :879

Box plot of the Monnthly Minutes

boxplot(Telecom_Data$MonthlyMinutes,
main = "Monthly Minutes of Customers",
xlab = "Monthly Min",
ylab = "Frequency",
col = "orange",
border = "brown",
horizontal = TRUE,
notch = TRUE
)

##Current Headset use in days

plot_ly(Telecom_Data, y= Telecom_Data$CurrentEquipmentDays, color = Telecom_Data$Churn, type = "box") %>% 
         layout(boxmode = "group", 
         xaxis = list(title=''), 
         yaxis = list(title='Frequency'))

Boxplot of Total Recurring Charge

plot_ly(Telecom_Data, y= Telecom_Data$TotalRecurringCharge, color = Telecom_Data$Churn, type = "box") %>% 
         layout(boxmode = "group", 
         xaxis = list(title=''), 
         yaxis = list(title='Frequency'))

Box plot of Month in Service

plot_ly(Telecom_Data, y= Telecom_Data$MonthsInService, color = Telecom_Data$Churn, type = "box") %>% 
         layout(boxmode = "group", 
         xaxis = list(title=''), 
         yaxis = list(title='Frequency'))

Box plot of the Percent change in recurrent charge

plot_ly(Telecom_Data, y= Telecom_Data$perc_recurrent_charge, color = Telecom_Data$Churn, type = "box") %>% 
         layout(boxmode = "group", 
         xaxis = list(title=''), 
         yaxis = list(title='Frequency'))

Box plot of Percent change in Minutes

plot_ly(Telecom_Data, y= Telecom_Data$PercChangeMinutes, color = Telecom_Data$Churn, type = "box") %>% 
         layout(boxmode = "group", 
         xaxis = list(title=''), 
         yaxis = list(title='Frequency'))

Box plot of Percent change in Revenues

plot_ly(Telecom_Data, y= Telecom_Data$PercChangeRevenues, color = Telecom_Data$Churn, type = "box") %>% 
         layout(boxmode = "group", 
         xaxis = list(title=''), 
         yaxis = list(title='Frequency'))

Distribution of the Montly Revenue

library(ggplot2)  
library(plotly)

set.seed(1)    

gg <- ggplot(Telecom_Data,aes(x = MonthlyRevenue, color = 'density')) +  
  geom_histogram(aes(y = ..density..), bins = 7,  fill = '#67B7D1', alpha = 0.5) +  
  geom_density(color = '#67B7D1') +  
  geom_rug(color = '#67B7D1') + 
  ylab("") + 
  xlab("")  + theme(legend.title=element_blank()) +
  scale_color_manual(values = c('density' = '#67B7D1'))


ggplotly(gg)%>% 
  layout(plot_bgcolor='#e5ecf6',   
             xaxis = list(   
               title='Time', 
               zerolinecolor = '#ffff',   
               zerolinewidth = 2,   
               gridcolor = 'ffff'),   
             yaxis = list(   
               title='Monthly Revenue', 
               zerolinecolor = '#ffff',   
               zerolinewidth = 2,   
               gridcolor = 'ffff')) 

Distribution of Monthly Minutes

library(ggplot2)  
library(plotly)

set.seed(1)    

gg <- ggplot(Telecom_Data,aes(x = MonthlyMinutes, color = 'density')) +  
  geom_histogram(aes(y = ..density..), bins = 7,  fill = '#67B7D1', alpha = 0.5) +  
  geom_density(color = '#67B7D1') +  
  geom_rug(color = '#67B7D1') + 
  ylab("") + 
  xlab("")  + theme(legend.title=element_blank()) +
  scale_color_manual(values = c('density' = '#67B7D1'))


ggplotly(gg)%>% 
  layout(plot_bgcolor='#e5ecf6',   
             xaxis = list(   
               title='Monthly Minutes ', 
               zerolinecolor = '#ffff',   
               zerolinewidth = 2,   
               gridcolor = 'ffff'),   
             yaxis = list(   
               title='Frequency', 
               zerolinecolor = '#ffff',   
               zerolinewidth = 2,   
               gridcolor = 'ffff')) 
qqnorm(Telecom_Data$MonthlyMinutes)                        # QQplot 


qqline(Telecom_Data$MonthlyMinutes, col = "red") 

#install.packages("car")
#library("car")
#qqPlot(Telecom_Data$MonthlyMinutes)
library("plotly")
#plot_ly(Telecom_Data, y= Telecom_Data$AgeHH1, color = Telecom_Data$Churn, type = "box") 
         #layout(boxmode = "group", 
        # xaxis = list(title=''), 
        # yaxis = list(title='Frequency'))

itgi